rm(list=ls())
library(tidyverse)
library(tseries)
library(forecast)
library(TSA)
library(dplyr)
# source: https://streeteasy.com/blog/data-dashboard/?agg=Total&metric=Inventory&type=Rentals&bedrooms=Any%20Bedrooms&property=Any%20Property%20Type&minDate=2010-01-01&maxDate=2022-12-01&area=Flatiron,Brooklyn%20Heights
# note that we can also get breakdown by bedrooms if we want
# set up generalizable path without local path hard coded
base_path <- system('git rev-parse --show-toplevel', intern = T)
# rental index - one for each of four real boroughs
# note: we won't use the rental index as it is already smoothed via ARIMA
rental_index_raw <- read.csv(paste0(base_path, '/data/rentalIndex_All.csv'))
# median asking rent - four real boroughs + 155 neighborhoods
median_rent_raw <- read.csv(paste0(base_path, '/data/medianAskingRent_All.csv'))
# rental inventory - four real boroughs + 155 neighborhoods
inventory_raw <- read.csv(paste0(base_path, '/data/rentalInventory_All.csv'))
rental_index_manhattan <- ts(rental_index_raw$Manhattan,
start=c(2007,1),
frequency=12)
tsdisplay(rental_index_manhattan)
median_rent_manhattan <- median_rent_raw %>%
filter(areaName=="Manhattan") %>%
unlist() %>% as.numeric() %>%
na.omit() %>%
ts(start=c(2010,1),frequency=12)
Warning: NAs introduced by coercion
tsdisplay(median_rent_manhattan)
inventory_manhattan <- inventory_raw %>%
filter(areaName=="Manhattan") %>%
unlist() %>% as.numeric() %>%
na.omit() %>%
ts(start=c(2010,1),frequency=12)
Warning: NAs introduced by coercion
tsdisplay(inventory_manhattan)
rent_train <- window(median_rent_manhattan, start=2010, end=c(2020,1))
inventory_train <- window(inventory_manhattan, start=2010, end=c(2020,1))
rent_test <- window(median_rent_manhattan, start=c(2020,2), end=c(2022,12))
inventory_test <- window(inventory_manhattan, start=c(2020,2), end=c(2022,12))
# Box-Cox transformed
tsdisplay(BoxCox(rent_train,lambda='auto'))
tsdisplay(BoxCox(inventory_train,lambda='auto'))
# Median Rent
autoplot(median_rent_manhattan, main = "Median Rent - Manhattan", ylab="Median Rent ($)", xlab="Year") +
theme_classic() +
theme(legend.position="none") +
scale_y_continuous(breaks=c(seq(3000,4200,400))) +
geom_hline(yintercept=seq(2800,4200, by=200), size=0.1, linetype=2)
ggsave("rent_raw_series.png", width=128, height=96, units="mm")
# Inventory
autoplot(inventory_manhattan, main = "Inventory - Manhattan", ylab="Inventory", xlab="Year") +
theme_classic() +
theme(legend.position="none") +
scale_y_continuous(breaks=c(seq(10000,40000,10000))) +
geom_hline(yintercept=seq(10000,40000, by=10000), size=0.1, linetype=2)
ggsave("inventory_raw_series.png", width=128, height=96, units="mm")
#save(inventory_manhattan,file='data/inventory_manhattan.RData')
#save(median_rent_manhattan,file='data/median_rent_manhattan.RData')
plot((rent_train-mean(rent_train))/var(rent_train)/30)
lines((inventory_train-mean(inventory_train))/var(inventory_train),col='red')
rent_stationary <- diff(BoxCox(rent_train,lambda=0),differences=1,lag=1)
inventory_stationary <- diff(BoxCox(inventory_train,lambda=0),differences=1,lag=1)
plot((rent_stationary-mean(rent_stationary))/var(rent_stationary))
lines(((inventory_stationary-mean(inventory_stationary)))/var(inventory_stationary),col='red')
# median rent SARIMA
rent_lambda = BoxCox.lambda(rent_train)
rent_sarima <- auto.arima(rent_train,lambda=rent_lambda,trace=TRUE)
ARIMA(2,1,2)(1,0,1)[12] with drift : Inf
ARIMA(0,1,0) with drift : 3141.802
ARIMA(1,1,0)(1,0,0)[12] with drift : 3128.999
ARIMA(0,1,1)(0,0,1)[12] with drift : 3134.978
ARIMA(0,1,0) : 3142.742
ARIMA(1,1,0) with drift : 3141.808
ARIMA(1,1,0)(2,0,0)[12] with drift : 3120.761
ARIMA(1,1,0)(2,0,1)[12] with drift : 3122.333
ARIMA(1,1,0)(1,0,1)[12] with drift : Inf
ARIMA(0,1,0)(2,0,0)[12] with drift : 3119.227
ARIMA(0,1,0)(1,0,0)[12] with drift : 3128.281
ARIMA(0,1,0)(2,0,1)[12] with drift : 3121.207
ARIMA(0,1,0)(1,0,1)[12] with drift : 3120.316
ARIMA(0,1,1)(2,0,0)[12] with drift : 3120.846
ARIMA(1,1,1)(2,0,0)[12] with drift : Inf
ARIMA(0,1,0)(2,0,0)[12] : 3118.965
ARIMA(0,1,0)(1,0,0)[12] : 3128.295
ARIMA(0,1,0)(2,0,1)[12] : 3120.657
ARIMA(0,1,0)(1,0,1)[12] : Inf
ARIMA(1,1,0)(2,0,0)[12] : 3120.213
ARIMA(0,1,1)(2,0,0)[12] : 3120.35
ARIMA(1,1,1)(2,0,0)[12] : Inf
Best model: ARIMA(0,1,0)(2,0,0)[12]
checkresiduals(rent_sarima)
Best rent SARIMA: nonseasonal first difference, seasonal AR(2), no drift. This is a parsimonous model with white noise residuals. Adding drift, nonseasonal AR(1) and seasonal MA(1) only slightly increase AICc. RMSE 31.8, MAE 24,08806, MAPE 0.7533599.
inventory_lambda = 0 # Note: As determined in Wesley's code, we will just use a log transform for the inventory series.
inventory_sarima <- auto.arima(inventory_train,lambda=inventory_lambda,trace=TRUE)
ARIMA(2,1,2)(1,1,1)[12] : -380.197
ARIMA(0,1,0)(0,1,0)[12] : -369.8573
ARIMA(1,1,0)(1,1,0)[12] : -384.1577
ARIMA(0,1,1)(0,1,1)[12] : -388.6616
ARIMA(0,1,1)(0,1,0)[12] : -369.3899
ARIMA(0,1,1)(1,1,1)[12] : -386.5875
ARIMA(0,1,1)(0,1,2)[12] : -386.5754
ARIMA(0,1,1)(1,1,0)[12] : -384.1396
ARIMA(0,1,1)(1,1,2)[12] : -384.4064
ARIMA(0,1,0)(0,1,1)[12] : -390.7689
ARIMA(0,1,0)(1,1,1)[12] : -388.737
ARIMA(0,1,0)(0,1,2)[12] : -388.7251
ARIMA(0,1,0)(1,1,0)[12] : -385.7352
ARIMA(0,1,0)(1,1,2)[12] : -386.5969
ARIMA(1,1,0)(0,1,1)[12] : -388.6629
ARIMA(1,1,1)(0,1,1)[12] : -386.6616
Best model: ARIMA(0,1,0)(0,1,1)[12]
checkresiduals(inventory_sarima)
Ljung-Box test
data: Residuals from ARIMA(0,1,0)(0,1,1)[12]
Q* = 29.482, df = 23, p-value = 0.1649
Model df: 1. Total lags used: 24
summary(inventory_sarima)
Series: inventory_train
ARIMA(0,1,0)(0,1,1)[12]
Box Cox transformation: lambda= 0
Coefficients:
sma1
-0.5795
s.e. 0.1068
sigma^2 = 0.00147: log likelihood = 197.44
AIC=-390.88 AICc=-390.77 BIC=-385.52
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -11.1729 673.5361 481.7169 0.04558128 2.686797 0.2295113 -0.01625024
Best inventory SARIMA: nonseasonal first difference, seasonal first-differenced MA(1). Residuals are mostly white noise - possibly some quarterly / two-year autocorrelations, but might also just be multiple testing (Ljung-Box doesn’t reject). Similarly to above, there are a number of models with very close AICc, though none with drift. RMSE 673.5361, MAE 481.7169, MAPE 2.686797.
rent_ets <- ets(rent_train, lambda = rent_lambda) # note that we get AAA without lambda
summary(rent_ets)
ETS(A,Ad,A)
Call:
ets(y = rent_train, lambda = rent_lambda)
Box-Cox transformation: lambda= 1.9999
Smoothing parameters:
alpha = 0.9996
beta = 2e-04
gamma = 3e-04
phi = 0.9744
Initial states:
l = 3945991.9135
b = 60001.9011
s = -61679.68 -540.6751 47843.86 146169.6 45911.49 43593.8
85571.15 74948.37 20094.2 -137340.3 -123001.8 -141570.1
sigma: 100911
AIC AICc BIC
3386.294 3393.000 3436.618
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.194433 29.52856 22.48471 -0.003752955 0.7053597 0.241986 0.1618703
checkresiduals(rent_ets)
Ljung-Box test
data: Residuals from ETS(A,Ad,A)
Q* = 37.477, df = 7, p-value = 3.809e-06
Model df: 17. Total lags used: 24
inventory_ets <- ets(inventory_train) # don't force lambda, because that forces additive model
summary(inventory_ets)
ETS(M,Ad,M)
Call:
ets(y = inventory_train)
Smoothing parameters:
alpha = 0.9884
beta = 0.0875
gamma = 1e-04
phi = 0.98
Initial states:
l = 13214.4939
b = 140.4349
s = 0.856 0.9087 0.9799 0.991 1.1088 1.146
1.1204 1.0792 1.0181 0.9735 0.9027 0.9156
sigma: 0.0367
AIC AICc BIC
2155.190 2161.895 2205.514
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -26.60746 621.3574 469.1133 -0.1277442 2.705515 0.2235064 0.02335815
checkresiduals(inventory_ets)
Ljung-Box test
data: Residuals from ETS(M,Ad,M)
Q* = 27.818, df = 7, p-value = 0.0002373
Model df: 17. Total lags used: 24
Rent ETS: AAA (additive error, trend, seasonality). alpha = 0.9996 beta = 2e-04 gamma = 3e-04 phi = 0.9744 Dominated by error, with very little trend or seasonality. Only slightly damped. Residuals mostly look like white noise, maybe 1.5 year seasonality, but Ljung-Box rejects at high significance level. Training RMSE 29.52856, MAE 22.48471, MAPE 0.7053597.
Inventory ETS: MAM (multiplicative error, additive trend, multiplicative seasonality). alpha = 0.9884 beta = 0.0875 gamma = 1e-04 phi = 0.98 Dominated by error, with some trend contribution and very little seasonality. Only slightly damped. Residuals mostly look like white noise but Ljung-Box rejects at high significance level. RMSE 621.3574, MAE 469.1133, MAPE 2.705515.
source('cross_validation.R')
# rent
output_arima <- cross.validate(rent_SARIMA_model,rent_train,80,12)
output_ets <- cross.validate(rent_ets_model,rent_train,80,12)
generate.plots(output_arima,output_ets)
# save
arima_expanding_errors <- data.frame(matrix(unlist(output_arima[[2]]), ncol = 12, byrow = TRUE))
ets_expanding_errors <- data.frame(matrix(unlist(output_ets[[2]]), ncol = 12, byrow = TRUE))
rent_arima_expanding_rmse <- sqrt(rowMeans((arima_expanding_errors)**2,na.rm=TRUE))
rent_ets_expanding_rmse <- sqrt(rowMeans((ets_expanding_errors)**2,na.rm=TRUE))
#save(rent_arima_expanding_rmse,file='data/rent_arima_expanding_rmse.RData')
#save(rent_ets_expanding_rmse,file='data/rent_ets_expanding_rmse.RData')
# inventory
output_arima <- cross.validate(inventory_SARIMA_model,inventory_train,80,12)
output_ets <- cross.validate(inventory_ets_model,inventory_train,80,12)
generate.plots(output_arima,output_ets)
# save
arima_expanding_errors <- data.frame(matrix(unlist(output_arima[[2]]), ncol = 12, byrow = TRUE))
ets_expanding_errors <- data.frame(matrix(unlist(output_ets[[2]]), ncol = 12, byrow = TRUE))
inventory_arima_expanding_rmse <- sqrt(rowMeans((arima_expanding_errors)**2,na.rm=TRUE))
inventory_ets_expanding_rmse <- sqrt(rowMeans((ets_expanding_errors)**2,na.rm=TRUE))
save(inventory_arima_expanding_rmse,file='data/inventory_arima_expanding_rmse.RData')
save(inventory_ets_expanding_rmse,file='data/inventory_ets_expanding_rmse.RData')
# rent ARIMA
rent_covid <- ts(median_rent_manhattan[(length(rent_train)+1):length(median_rent_manhattan)],frequency=12,start=c(2020,2))
plot(forecast(rent_sarima,length(rent_covid)))
lines(rent_covid)
rent_effect <- rent_covid-forecast(rent_sarima,length(rent_covid))$mean
upper_conf <- forecast(rent_sarima,length(rent_covid))$upper[,2]-forecast(rent_sarima,length(rent_covid))$mean
lower_conf <- forecast(rent_sarima,length(rent_covid))$lower[,2]-forecast(rent_sarima,length(rent_covid))$mean
# plot(rent_effect,main='Estimated COVID Effect on Rent (via ARIMA)')
# lines(upper_conf,col='blue')
# lines(lower_conf,col='blue')
# abline(h=0)
# mean(rent_effect)
autoplot(rent_effect, main = "Estimated COVID Effect on Rent (via ARIMA)", ylab="Rent Effect", xlab="Year") +
geom_hline(yintercept=0, size=0.5, linetype=1) +
theme_classic() +
theme(legend.position="none") +
scale_y_continuous(breaks=c(seq(-1000,600,200))) +
geom_hline(yintercept=seq(-1000, 600, by=200), size=0.1, linetype=2) +
labs(caption = "Effect in relation to pre-COVID model predictions.")
ggsave("rent_covid_effect.png", width=128, height=96, units="mm")
NA
# inventory ARIMA
inventory_covid <- ts(inventory_manhattan[(length(rent_train)+1):length(inventory_manhattan)],frequency=12,start=c(2020,2))
plot(forecast(inventory_sarima,length(inventory_covid)))
lines(inventory_covid)
inventory_effect <- inventory_covid-forecast(inventory_sarima,length(inventory_covid))$mean
upper_conf <- forecast(inventory_sarima,length(inventory_covid))$upper[,2]-forecast(inventory_sarima,length(inventory_covid))$mean
# plot(inventory_effect,main='Estimated COVID Effect on Inventory (via ARIMA)')
# lines(upper_conf,col='blue')
# abline(h=0)
# mean(inventory_effect)
autoplot(inventory_effect, main = "Estimated COVID Effect on Inventory (via ARIMA)", ylab="Inventory Effect", xlab="Year") +
geom_hline(yintercept=0, size=0.5, linetype=1) +
theme_classic() +
theme(legend.position="none") +
scale_y_continuous(breaks=c(seq(-6000,26000,6000))) +
geom_hline(yintercept=seq(-6000, 26000, by=6000), size=0.1, linetype=2) +
labs(caption = "Effect in relation to pre-COVID model predictions.")
ggsave("inventory_covid_effect.png", width=128, height=96, units="mm")
# rent ETS
plot(forecast(rent_ets,length(rent_covid)))
lines(rent_covid)
rent_effect_ets <- rent_covid-forecast(rent_ets,length(rent_covid))$mean
upper_conf <- forecast(rent_ets,length(rent_covid))$upper[,2]-forecast(rent_ets,length(rent_covid))$mean
lower_conf <- forecast(rent_ets,length(rent_covid))$lower[,2]-forecast(rent_ets,length(rent_covid))$mean
plot(rent_effect_ets,main='Estimated COVID Effect on Rent (via ETS)')
lines(upper_conf,col='blue')
lines(lower_conf,col='blue')
abline(h=0)
mean(rent_effect_ets)
[1] -149.7264
# inventory ETS
plot(forecast(inventory_ets,length(inventory_covid)))
lines(inventory_covid)
inventory_effect_ets <- inventory_covid-forecast(inventory_ets,length(inventory_covid))$mean
upper_conf <- forecast(inventory_ets,length(inventory_covid))$upper[,2]-forecast(inventory_ets,length(inventory_covid))$mean
plot(inventory_effect_ets,main='Estimated COVID Effect on Inventory (via ETS)')
lines(upper_conf,col='blue')
abline(h=0)
mean(rent_effect_ets)
[1] -149.7264
intervention_vec <- rep(c(0,1), c(122, 34))
# allow free estimation
summary(auto.arima(median_rent_manhattan,xreg=intervention_vec,lambda=rent_lambda))
Series: median_rent_manhattan
Regression with ARIMA(5,1,0)(2,0,1)[12] errors
Box Cox transformation: lambda= 1.999924
Coefficients:
ar1 ar2 ar3 ar4 ar5 sar1 sar2 sma1 xreg
0.2393 0.3747 0.2056 -0.2273 0.1387 0.5785 0.2231 -0.5980 199019.2
s.e. 0.0824 0.0856 0.0895 0.0865 0.0859 0.2305 0.1110 0.2299 116431.3
sigma^2 = 1.848e+10: log likelihood = -2049.35
AIC=4118.69 AICc=4120.22 BIC=4149.13
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 2.82655 40.17769 31.31056 0.09214803 0.9687078 0.1408447 0.02401528
summary(auto.arima(inventory_manhattan,xreg=intervention_vec,lambda=inventory_lambda))
Series: inventory_manhattan
Regression with ARIMA(2,1,0)(2,0,0)[12] errors
Box Cox transformation: lambda= 0
Coefficients:
ar1 ar2 sar1 sar2 xreg
0.3703 0.2798 0.2748 0.4197 -0.2299
s.e. 0.0897 0.1001 0.0707 0.0732 0.0642
sigma^2 = 0.003774: log likelihood = 210.95
AIC=-409.91 AICc=-409.34 BIC=-391.65
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -92.25497 1222.635 796.6559 -0.2457241 4.167508 0.1608239 0.1695361
# specify models as above
summary(Arima(median_rent_manhattan,c(0,1,0),c(2,0,0),xreg=intervention_vec,lambda=rent_lambda))
Series: median_rent_manhattan
Regression with ARIMA(0,1,0)(2,0,0)[12] errors
Box Cox transformation: lambda= 1.999924
Coefficients:
sar1 sar2 xreg
-0.0500 0.1024 -17881.39
s.e. 0.0945 0.1094 171662.35
sigma^2 = 3.005e+10: log likelihood = -2088.34
AIC=4184.67 AICc=4184.94 BIC=4196.85
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 8.724534 50.98727 34.51863 0.2374683 1.053643 0.1552756 0.3987022
summary(Arima(inventory_manhattan,order=c(0,1,0),seasonal=c(0,1,1),xreg=intervention_vec,lambda=inventory_lambda))
Series: inventory_manhattan
Regression with ARIMA(0,1,0)(0,1,1)[12] errors
Box Cox transformation: lambda= 0
Coefficients:
sma1 xreg
-0.8952 -0.1444
s.e. 0.1001 0.0695
sigma^2 = 0.004918: log likelihood = 168.89
AIC=-331.78 AICc=-331.61 BIC=-322.89
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 5.469475 1579.887 841.8457 -0.1534565 3.995638 0.1699466 0.7245131